Импорт phyloseq объекта и библиотек
Датасет - хроносерия разложения соломы - от фактор Day 10ти уровней
library(phyloseq)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggpubr)
library(ampvis2)
library(ANCOMBC)
library(heatmaply)
## Loading required package: plotly
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
##
## Loading required package: viridis
## Loading required package: viridisLite
## Registered S3 method overwritten by 'dendextend':
## method from
## rev.hclust vegan
## Registered S3 methods overwritten by 'registry':
## method from
## print.registry_field proxy
## print.registry_entry proxy
## Registered S3 method overwritten by 'seriation':
## method from
## reorder.hclust vegan
##
## ======================
## Welcome to heatmaply version 1.3.0
##
## Type citation('heatmaply') for how to cite the package.
## Type ?heatmaply for the main documentation.
##
## The github page is: https://github.com/talgalili/heatmaply/
## Please submit your suggestions and bug-reports at: https://github.com/talgalili/heatmaply/issues
## You may ask questions at stackoverflow, use the r and heatmaply tags:
## https://stackoverflow.com/questions/tagged/heatmaply
## ======================
library(WGCNA)
## Loading required package: dynamicTreeCut
## Loading required package: fastcluster
##
## Attaching package: 'fastcluster'
##
## The following object is masked from 'package:stats':
##
## hclust
##
##
##
## Attaching package: 'WGCNA'
##
## The following object is masked from 'package:stats':
##
## cor
ps.f <- readRDS("psf2")
#phyloseq object to ampvis2 object
#https://gist.github.com/KasperSkytte/8d0ca4206a66be7ff6d76fc4ab8e66c6
phyloseq_to_ampvis2 <- function(physeq) {
#check object for class
if(!any(class(physeq) %in% "phyloseq"))
stop("physeq object must be of class \"phyloseq\"", call. = FALSE)
#ampvis2 requires taxonomy and abundance table, phyloseq checks for the latter
if(is.null(physeq@tax_table))
stop("No taxonomy found in the phyloseq object and is required for ampvis2", call. = FALSE)
#OTUs must be in rows, not columns
if(phyloseq::taxa_are_rows(physeq))
abund <- as.data.frame(phyloseq::otu_table(physeq)@.Data)
else
abund <- as.data.frame(t(phyloseq::otu_table(physeq)@.Data))
#tax_table is assumed to have OTUs in rows too
tax <- phyloseq::tax_table(physeq)@.Data
#merge by rownames (OTUs)
otutable <- merge(
abund,
tax,
by = 0,
all.x = TRUE,
all.y = FALSE,
sort = FALSE
)
colnames(otutable)[1] <- "OTU"
#extract sample_data (metadata)
if(!is.null(physeq@sam_data)) {
metadata <- data.frame(
phyloseq::sample_data(physeq),
row.names = phyloseq::sample_names(physeq),
stringsAsFactors = FALSE,
check.names = FALSE
)
#check if any columns match exactly with rownames
#if none matched assume row names are sample identifiers
samplesCol <- unlist(lapply(metadata, function(x) {
identical(x, rownames(metadata))}))
if(any(samplesCol)) {
#error if a column matched and it's not the first
if(!samplesCol[[1]])
stop("Sample ID's must be in the first column in the sample metadata, please reorder", call. = FALSE)
} else {
#assume rownames are sample identifiers, merge at the end with name "SampleID"
if(any(colnames(metadata) %in% "SampleID"))
stop("A column in the sample metadata is already named \"SampleID\" but does not seem to contain sample ID's", call. = FALSE)
metadata$SampleID <- rownames(metadata)
#reorder columns so SampleID is the first
metadata <- metadata[, c(which(colnames(metadata) %in% "SampleID"), 1:(ncol(metadata)-1L)), drop = FALSE]
}
} else
metadata <- NULL
#extract phylogenetic tree, assumed to be of class "phylo"
if(!is.null(physeq@phy_tree)) {
tree <- phyloseq::phy_tree(physeq)
} else
tree <- NULL
#extract OTU DNA sequences, assumed to be of class "XStringSet"
if(!is.null(physeq@refseq)) {
#convert XStringSet to DNAbin using a temporary file (easiest)
fastaTempFile <- tempfile(pattern = "ampvis2_", fileext = ".fa")
Biostrings::writeXStringSet(physeq@refseq, filepath = fastaTempFile)
} else
fastaTempFile <- NULL
#load as normally with amp_load
ampvis2::amp_load(
otutable = otutable,
metadata = metadata,
tree = tree,
fasta = fastaTempFile
)
}
#variance stabilisation from DESeq2
ps_vst <- function(ps, factor){
require(DESeq2)
require(phyloseq)
diagdds = phyloseq_to_deseq2(ps, as.formula(paste( "~", factor)))
diagdds = estimateSizeFactors(diagdds, type="poscounts")
diagdds = estimateDispersions(diagdds, fitType = "local")
pst <- varianceStabilizingTransformation(diagdds)
pst.dimmed <- t(as.matrix(assay(pst)))
# pst.dimmed[pst.dimmed < 0.0] <- 0.0
ps.varstab <- ps
otu_table(ps.varstab) <- otu_table(pst.dimmed, taxa_are_rows = FALSE)
return(ps.varstab)
}
#WGCNA visualisation
#result - list class object with attributes:
# ps - phyloseq object
# amp - ampwis2 object
# heat - heatmap with absalute read numbers
# heat_rel - hetmap with relative abundances
# tree - phylogenetic tree with taxonomy
color_filt <- function(ps, df){
library(tidyverse)
library(reshape2)
library(gridExtra)
l = list()
for (i in levels(df$module)){
message(i)
color_name <- df %>%
filter(module == i) %>%
pull(asv) %>%
unique()
ps.col <- prune_taxa(color_name, ps)
amp.col <- phyloseq_to_amp(ps.col)
heat <- amp_heatmap(amp.col, tax_show = 60,
group_by = "Day",
tax_aggregate = "OTU",
tax_add = "Genus",
normalise=FALSE,
showRemainingTaxa = TRUE)
ps.rel <- phyloseq::transform_sample_counts(ps.col, function(x) x / sum(x) * 100)
amp.r <- phyloseq_to_ampvis2(ps.rel)
heat.rel <- amp_heatmap(amp.r, tax_show = 60,
group_by = "Day",
tax_aggregate = "OTU",
tax_add = "Genus",
normalise=FALSE,
showRemainingTaxa = TRUE)
tree <- ps.col@phy_tree
taxa <- as.data.frame(ps.col@tax_table@.Data)
p1 <- ggtree(tree) +
geom_tiplab(size=2, align=TRUE, linesize=.5) +
theme_tree2()
taxa[taxa == "Allorhizobium-Neorhizobium-Pararhizobium-Rhizobium"] <- "Allorhizobium"
taxa[taxa == "Burkholderia-Caballeronia-Paraburkholderia"] <- "Burkholderia"
tx <- taxa %>%
rownames_to_column("id") %>%
mutate(id = factor(id, levels = rev(get_taxa_name(p1)))) %>%
dplyr::select(-c(Kingdom, Species, Order)) %>%
melt(id.var = 'id')
p2 <- ggplot(tx, aes(variable, id)) +
geom_tile(aes(fill = value), alpha = 0.4) +
geom_text(aes(label = value), size = 2) +
theme_bw() +
theme(legend.position = "none",
axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank())
p <- ggpubr::ggarrange(p1, p2, widths = c(0.9, 1))
l[[i]] <- list("ps" = ps.col,
"amp" = amp.col,
"heat" = heat,
"heat_rel" = heat.rel,
"tree" = p)
}
return(l)
}
Разделим датасет на две группы
1-я - в более чем 10% образцов должно быть хотя бы 10 ридов - эта группа пойдет в анализ далее
ps.inall <- phyloseq::filter_taxa(ps.f, function(x) sum(x > 10) > (0.1*length(x)), TRUE)
ps.inall
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 338 taxa and 35 samples ]
## sample_data() Sample Data: [ 35 samples by 2 sample variables ]
## tax_table() Taxonomy Table: [ 338 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 338 tips and 337 internal nodes ]
## refseq() DNAStringSet: [ 338 reference sequences ]
amp.inall <- phyloseq_to_ampvis2(ps.inall)
amp_heatmap(amp.inall,
tax_show = 40,
group_by = "Day",
tax_aggregate = "OTU",
tax_add = "Genus",
normalise=FALSE,
showRemainingTaxa = TRUE)
Вторая - оставшиеся, но более 100 ридов по всем образцам(далее -
вылетающие мажоры(ВМ))
Остальные филотипы выкидываем из анализа
ps.exl <- phyloseq::filter_taxa(ps.f, function(x) sum(x > 10) < (0.1*length(x)), TRUE)
ps.exl <- prune_taxa(taxa_sums(ps.exl) > 100, ps.exl)
amp.exl <- phyloseq_to_ampvis2(ps.exl)
amp_heatmap(amp.exl,
tax_show = 40,
group_by = "Day",
tax_aggregate = "OTU",
tax_add = "Genus",
normalise=FALSE,
showRemainingTaxa = TRUE)
То же, но c относительной представленностью.
ps.per <- phyloseq::transform_sample_counts(ps.f, function(x) x / sum(x) * 100)
ps.exl.taxa <- taxa_names(ps.exl)
ps.per.exl <- prune_taxa(ps.exl.taxa, ps.per)
amp.exl.r <- phyloseq_to_ampvis2(ps.per.exl)
amp_heatmap(amp.exl.r, tax_show = 60,
group_by = "Day",
tax_aggregate = "OTU",
tax_add = "Genus",
round = 2,
normalise=FALSE,
showRemainingTaxa = TRUE)
amp_heatmap(amp.exl.r, tax_show = 60,
group_by = "Day",
tax_aggregate = "OTU",
tax_add = "Genus",
round = 2,
normalise=FALSE, )
ВМ объединенные по родам. Относительная представленность.
amp_heatmap(amp.exl.r,
tax_show = 30,
group_by = "Day",
tax_aggregate = "Genus",
tax_add = "Phylum",
tax_class = "Proteobacteria",
round = 2,
normalise=FALSE,
showRemainingTaxa = TRUE)
Если посмотреть, если как распределяются ВМ по дням - похоже это случай
phyloseq::sample_sums(ps.per.exl) %>%
as.data.frame(col.names = "values") %>%
setNames(., nm = "values") %>%
rownames_to_column("samples") %>%
mutate(Day = sapply(strsplit(samples, "-"), `[`, 3)) %>%
ggplot(aes(x=Day, y=values, color=Day)) +
geom_boxplot(aes(color=Day)) +
geom_point(color = "black", position = position_dodge(width=0.2)) +
theme_bw() +
theme(legend.position = "none")